## Warning: replacing previous import 'GenomicRanges::shift' by 'data.table::shift'
## when loading 'plgINS'

enrichMiR

#' getTS
#'
#' @param species character object. Can be "human", "mouse" or "rat"
#'
#' @return TargetScan miRNA target dataframe with family information in metadata()
#'
getTS <- function(species=c("human","mouse","rat")){
  library(S4Vectors)
  species <- match.arg(species)
  
  # assign species ID
  spec <- switch( species,
                  human = 9606,
                  mouse = 10090,
                  rat = 10116 )
  
  
  # download TargetScan miRNA targeting dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/Summary_Counts.all_predictions.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("Summary_Counts.all_predictions.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  sub$score <- as.numeric(as.character(sub$'Cumulative weighted context++ score'))
    
  # generate TargetScan dataframe
  ts <- DataFrame(family = sub$'miRNA family',
                   rep.miRNA = sub$'Representative miRNA',
                   feature = sub$'Gene Symbol',
                   sites = sub$'Total num conserved sites',
                   score = as.numeric(as.character(sub$'Cumulative weighted context++ score'))
                   )
  ts <- DataFrame(
    aggregate(sub[,c("sites","score")], by=ts[,c("family","feature")], FUN=mean)
    )
  

  
  # download TargetScan miRNA families dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/miR_Family_Info.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("miR_Family_Info.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  
  fam <- sub$`Seed+m8`
  names(fam) <- sub$`MiRBase ID`
  
  # add family info to ts dataframe as attribute
  metadata(ts)$families <- fam
  
  # enrichMiR cant handle 0 values for sites feature
  return(ts[ts$sites!=0,])
}
tests <- c("areamir","overlap","michael","KS","KS2","MW")
# all tests except WO:
tests <- c("overlap","michael","mw","ks","ks2","areamir","areamir2")
# all tests
tests <- c("overlap","michael","wo","mw","ks","ks2","gsea","gseamod","modscore","modsites","areamir","areamir2")
tests <- NULL
cores <- 8

# run enrichMiR on all objects of dea.list (high runtime!)

## foreach: doesnt work
library(foreach)
library(doParallel)
# cl <- makeCluster(cores)
# registerDoParallel(cl, cores=cores)
# chunk.size <- length(dea.names)/cores
# test <- foreach(c=1:cores, .combine="rbind") %dopar% {
#   for(i in dea.names[((c-1)*chunk.size+1):(c*chunk.size)]){
#     lapply(names(props.all), FUN=function(j){
#       enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
#                 cleanNames=TRUE, tests=tests)
#       })
#     }
# }
#stopImplicitCluster()
# stopCluster(cl)

## foreach: Pierre's code (works, but 1 core)
res <- foreach( dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE) ) %dopar% {
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                }

## mclapply (works, but 1 core; all cores when reduced tests var)
## looks like GSEA, WO, mods are problematic
library(parallel)
# multicores on Linux
e.list <- mclapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  }, mc.cores = cores )
## mcmapply: inspired by Pierre's code
res <- mcmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                  }, mc.cores = cores )


## bplapply: doesnt work at all
e.list <- bplapply(dea.names[1:4], FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr,
                cleanNames=TRUE, tests=tests)
      })
  }, BPPARAM = MulticoreParam(cores, progressbar = TRUE) )


## bpmapply: Pierre's code
res <- bpmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                BPPARAM=MulticoreParam(4, threshold="TRACE"), FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, cleanNames=TRUE, tests=tests )
                  } )


## serial run
e.list <- lapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  })


# naming
names(e.list) <- dea.names
for(i in dea.names){
  names(e.list[[i]]) <- names(props.all)
}

# use this between parallel runs
gc(verbose = T)

Benchmarking

Plots

## Score analysis:  detPPV

## Score analysis:  FP.atFDR05

## Score analysis:  log10QDiff

## Score analysis:  TP.atFDR05

scores over the datasets

## $`miR-122`
## original       20       30       40       50 
##    0.824    0.824    0.825    0.790    0.678 
## 
## $`miR-138`
## original       20       30       40       50 
##    0.801    0.809    0.824    0.798    0.732 
## 
## $`miR-145`
## original       20       30       40       50 
##    0.767    0.703    0.683    0.655    0.519 
## 
## $`miR-184`
## original       20       30       40       50 
##    0.568    0.525    0.467    0.485    0.375 
## 
## $`miR-190a`
## original       20       30       40       50 
##    0.771    0.761    0.715    0.706    0.481 
## 
## $`miR-200b`
## original       20       30       40       50 
##    0.801    0.801    0.801    0.795    0.749 
## 
## $`miR-216a`
## original       20       30       40       50 
##    0.656    0.453    0.269    0.158    0.109 
## 
## $`miR-217`
## original       20       30       40       50 
##    0.729    0.709    0.670    0.538    0.287 
## 
## $`miR-219a`
## original       20       30       40       50 
##    0.795    0.824    0.805    0.805    0.805 
## 
## $`miR-375`
## original       20       30       40       50 
##    0.795    0.757    0.759    0.596    0.563 
## 
## $`miR-451a`
## original       20       30       40       50 
##    0.648    0.619    0.601    0.431    0.463

scores over methods

## $aREAmir
## original       20       30       40       50 
##    0.955    0.955    0.876    0.794    0.609 
## 
## $EN.up
## original       20       30       40       50 
##    0.013    0.013    0.012    0.013    0.012 
## 
## $EN.down
## original       20       30       40       50 
##    1.000    0.939    0.906    0.795    0.577 
## 
## $EN.combined
## original       20       30       40       50 
##    1.000    0.939    0.899    0.826    0.715 
## 
## $wEN.up
## original       20       30       40       50 
##    0.006    0.006    0.012    0.007    0.007 
## 
## $wEN.down
## original       20       30       40       50 
##    1.000    1.000    0.955    0.912    0.851 
## 
## $wEN.combined
## original       20       30       40       50 
##    1.000    1.000    0.926    0.922    0.901 
## 
## $michael.up
## original       20       30       40       50 
##    0.006    0.007    0.014    0.007    0.007 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    1.000    0.957    0.914    0.894 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    1.000    0.951    0.864    0.743 
## 
## $MW
## original       20       30       40       50 
##    0.865    0.786    0.770    0.630    0.439 
## 
## $KS
## original       20       30       40       50 
##    0.716    0.691    0.658    0.538    0.364 
## 
## $KS2
## original       20       30       40       50 
##    1.000    0.955    0.947    0.878    0.786 
## 
## $GSEA
## original       20       30       40       50 
##    0.528    0.481    0.477    0.493    0.453 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.447    0.380    0.400    0.350    0.314 
## 
## $modSites
## original       20       30       40       50 
##    0.955    0.867    0.798    0.724    0.581 
## 
## $modScore
## original       20       30       40       50 
##    1.000    0.902    0.841    0.731    0.644

TP ratio at FDR 0.05 over methods

## $aREAmir
## original       20       30       40       50 
##    0.818    0.667    0.545    0.515    0.303 
## 
## $EN.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $EN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $EN.combined
## original       20       30       40       50 
##    1.000    1.000    0.970    0.970    0.939 
## 
## $wEN.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $wEN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $wEN.combined
## original       20       30       40       50 
##    1.000    1.000    0.970    0.970    0.909 
## 
## $michael.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    1.000    0.970    0.939    0.818 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    1.000    0.970    0.909    0.727 
## 
## $MW
## original       20       30       40       50 
##     1.00     1.00     1.00     1.00     0.97 
## 
## $KS
## original       20       30       40       50 
##     1.00     1.00     1.00     1.00     0.97 
## 
## $KS2
## original       20       30       40       50 
##    0.909    0.909    0.818    0.758    0.576 
## 
## $GSEA
## original       20       30       40       50 
##    0.091    0.121    0.182    0.212    0.303 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.364    0.364    0.485    0.545    0.576 
## 
## $modSites
## original       20       30       40       50 
##    1.000    1.000    1.000    1.000    0.879 
## 
## $modScore
## original       20       30       40       50 
##    1.000    1.000    1.000    1.000    0.879